machine="mythinkpad" # define the machine we work on
loadALL = FALSE # only load CpG shared by half fish per trt group
loadannot = TRUE # load genome annotations
sourceDMS = TRUE # Load the calculated DMS
source("R02.3_DATALOAD.R")
## Warning: replacing previous import 'Biostrings::pattern' by 'grid::pattern' when
## loading 'genomation'
library(knitr)
knitr::opts_chunk$set(cache = TRUE, warning = FALSE,
message = FALSE, cache.lazy = FALSE, cache.path = "../../gitignore/RmdCache/", # keep heavy data in gitignore cache
fig.path = "Rfigures/Rmd/")
## Features Annotation (use package genomation v1.24.0)
## NB Promoters are defined by options at genomation::readTranscriptFeatures function.
## The default option is to take -1000,+1000bp around the TSS and you can change that.
## -> following Heckwolf 2020 and Sagonas 2020, we consider 1500bp upstream and 500 bp downstream
## Parents comparison:
diffAnn_PAR = annotateWithGeneParts(as(DMSlist$DMS_15pc_G1_C_T,"GRanges"),annotBed12)
## Offspring from control parents comparison:
diffAnn_G2_controlG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_CC_CT,"GRanges"),annotBed12)
## Offspring from infected parents comparison:
diffAnn_G2_infectedG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_TC_TT,"GRanges"),annotBed12)
par(mfrow=c(1,3))
par(mar = c(.1,0.1,5,0.1)) # Set the margin on all sides to 2
## Parents comparison:
diffAnn_PAR = annotateWithGeneParts(as(DMSlist$DMS_15pc_G1_C_T,"GRanges"),annotBed12)
diffAnn_PAR
## promoter exon intron intergenic
## 8.58 15.84 34.13 45.72
## promoter exon intron intergenic
## 8.58 13.19 32.51 45.72
## promoter exon intron
## 1.07 0.19 0.44
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 3020 8128 15141 19226 300247
genomation::plotTargetAnnotation(diffAnn_PAR,precedence=TRUE, main="DMS G1", cex.legend = 1, border="white")
## Offspring from control parents comparison:
diffAnn_G2_controlG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_CC_CT,"GRanges"),annotBed12)
diffAnn_G2_controlG1
## promoter exon intron intergenic
## 11.28 21.05 30.16 44.44
## promoter exon intron intergenic
## 11.28 16.21 28.07 44.44
## promoter exon intron
## 0.38 0.07 0.14
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 11 2602 6295 14212 15906 261017
genomation::plotTargetAnnotation(diffAnn_G2_controlG1,precedence=TRUE, main="DMS G2-G1c", cex.legend = 1, border="white")
## Offspring from infected parents comparison:
diffAnn_G2_infectedG1 = annotateWithGeneParts(as(DMSlist$DMS_15pc_TC_TT,"GRanges"),annotBed12)
diffAnn_G2_infectedG1
## promoter exon intron intergenic
## 12.90 13.62 34.64 46.81
## promoter exon intron intergenic
## 12.90 9.42 30.87 46.81
## promoter exon intron
## 0.28 0.03 0.08
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9 2355 7150 12285 14773 109527
genomation::plotTargetAnnotation(diffAnn_G2_infectedG1,precedence=TRUE, main="DMS G2-G1i", cex.legend = 1, border="white")
par(mfrow=c(1,1))
## Run the function to get DMS info
DMS_info_G1 <- myDMSinfo(DMSlist$DMS_15pc_G1_C_T, uniteCov6_G1_woSexAndUnknowChrOVERLAP)
DMS_info_G2_G1c_final <- myDMSinfo(DMSlist$DMS_15pc_CC_CT, uniteCov14_G2_woSexAndUnknowChrOVERLAP)
DMS_info_G2_G1i_final <- myDMSinfo(DMSlist$DMS_15pc_TC_TT,uniteCov14_G2_woSexAndUnknowChrOVERLAP)
NB Kostas’ results: “We found a total of 1,973 CpG sites out of 1,172,887 CpGs (0.17%) across the genome that showed at least 15% differential fractional methylation (differentially methylated site [DMS]; q < 0.01) between infected and uninfected fish”
Here: we obtain out a total of 1001880 CpG sites: 3648 (0.36%) showed at least 15% differential fractional methylation (differentially methylated site [DMS]; q < 0.01) between infected and uninfected fish in the parental group; 1197 (0.12%) in the offspring from control parents comparison; 690 (0.07%) in the offspring from infected parents comparison.
## Chi2 test: are the number of DMS from G2-G1C and G2-G1I overlapping with DMSpar statistically different?
A=length(intersect(DMS_info_G1$DMS,DMS_info_G2_G1c_final$DMS))
B=length(DMS_info_G2_G1c_final$DMS)
C=length(intersect(DMS_info_G1$DMS,DMS_info_G2_G1i_final$DMS))
D=length(DMS_info_G2_G1i_final$DMS)
Observed=matrix(c(A, B-A, C, D-C),nrow=2)
Observed
## [,1] [,2]
## [1,] 106 59
## [2,] 1091 631
chisq.test(Observed)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: Observed
## X-squared = 0.019909, df = 1, p-value = 0.8878
## not statistically different
## output Venn diagrams
allVenn <- ggVennDiagram(list("DMS G1" = DMS_info_G1$DMS, "DMS G2-c" = DMS_info_G2_G1c_final$DMS, "DMS G2-i" = DMS_info_G2_G1i_final$DMS), label_alpha = 0) +
scale_fill_gradient(low="white",high = "red")
hypoVenn <- ggVennDiagram(list("DMS G1\nhypo" = DMS_info_G1$DMS[DMS_info_G1$direction %in% "hypo"],
"DMS G2-c\nhypo" = DMS_info_G2_G1c_final$DMS[DMS_info_G2_G1c_final$direction %in% "hypo"],
"DMS G2-i\nhypo" = DMS_info_G2_G1i_final$DMS[DMS_info_G2_G1i_final$direction %in% "hypo"]), label_alpha = 0) +
scale_fill_gradient(low="white",high = "red")
hyperVenn <- ggVennDiagram(list("DMS G1\nhyper" = DMS_info_G1$DMS[DMS_info_G1$direction %in% "hyper"],
"DMS G2-c\nhyper" = DMS_info_G2_G1c_final$DMS[DMS_info_G2_G1c_final$direction %in% "hyper"],
"DMS G2-i\nhyper" = DMS_info_G2_G1i_final$DMS[DMS_info_G2_G1i_final$direction %in% "hyper"]), label_alpha = 0) +
scale_fill_gradient(low="white",high = "red")
ggarrange(allVenn,
ggarrange(hypoVenn, hyperVenn, ncol = 2, legend = "none"),
nrow = 2, widths = c(.5,1))
runHyperHypoAnnot <- function(){
par(mfrow=c(2,3))
par(mar = c(.1,0.1,5,0.1)) # Set the margin on all sides to 2
####### HYPO
## Parents comparison:
A = annotateWithGeneParts(
as(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hypo",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(A,precedence=TRUE, main="DMS G1\nhypo",
cex.legend = .4, border="white")
## Offspring from control parents comparison:
B = annotateWithGeneParts(
as(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hypo",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(B,precedence=TRUE, main="DMS G2-G1c\nhypo",
cex.legend = .4, border="white")
## Offspring from infected parents comparison:
C = annotateWithGeneParts(
as(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hypo",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(C,precedence=TRUE, main="DMS G2-G1i\nhypo",
cex.legend = .4, border="white")
####### HYPER
## Parents comparison:
D = annotateWithGeneParts(
as(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hyper",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(D,precedence=TRUE, main="DMS G1\nhyper",
cex.legend = .4, border="white")
## Offspring from control parents comparison:
E = annotateWithGeneParts(
as(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hyper",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(E,precedence=TRUE, main="DMS G2-G1c\nhyper",
cex.legend = .4, border="white")
## Offspring from infected parents comparison:
f = annotateWithGeneParts(
as(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hyper",],"GRanges"),annotBed12)
genomation::plotTargetAnnotation(f,precedence=TRUE, main="DMS G2-G1i\nhyper",
cex.legend = .4, border="white")
par(mfrow=c(1,1))
return(list(G1hypo=A, G2G1chypo=B, G2G1ihypo=C, G1hyper=D, G2G1chyper=E, G2G1ihyper=f))
}
myannot=runHyperHypoAnnot()
############################################################
## Venn diagram of overlapping features by their annotation:
table(rowSums(as.data.frame(myannot$G1hypo@members))) # NB: some positions are labelled with several features!
##
## 0 1 2 3
## 559 566 49 2
## as in MBE 2021: "giving precedence to the following order promoters, exons,
## introns, and intergenic regions when features overlapped"
myAnnotateDMS <- function(DMS, annot){
## sanity check
if (nrow(DMS) != nrow(annot)){"STOP error in arguments"}
DMS$pos <- paste(DMS$chr, DMS$start, DMS$end)
## NB as in MBE 2021: "giving precedence to the following order promoters, exons,
## introns, and intergenic regions when features overlapped"
DMS$feature <- NA
## 1. promoters
DMS$feature[which(annot$prom == 1)] = "promoter"
## 2. exons
DMS$feature[which(annot$exon == 1 & annot$prom ==0)] = "exon"
## 3. intron
DMS$feature[which(annot$intro == 1 & annot$exon == 0 & annot$prom ==0)] = "intron"
## 4. intergenic regions
DMS$feature[which(annot$intro == 0 & annot$exon == 0 & annot$prom ==0)] = "intergenic"
return(DMS)
}
DMSlist$DMS_15pc_G1_C_T = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T, as.data.frame(diffAnn_PAR@members))
DMSlist$DMS_15pc_G1_C_T_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hypo",],
as.data.frame(myannot$G1hypo@members))
DMSlist$DMS_15pc_G1_C_T_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_G1_C_T[DMS_info_G1$direction %in% "hyper",],
as.data.frame(myannot$G1hyper@members))
DMSlist$DMS_15pc_CC_CT = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT, as.data.frame(diffAnn_G2_controlG1@members))
DMSlist$DMS_15pc_CC_CT_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hypo",],
as.data.frame(myannot$G2G1chypo@members))
DMSlist$DMS_15pc_CC_CT_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_CC_CT[DMS_info_G2_G1c_final$direction %in% "hyper",],
as.data.frame(myannot$G2G1chyper@members))
DMSlist$DMS_15pc_TC_TT = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT, as.data.frame(diffAnn_G2_infectedG1@members))
DMSlist$DMS_15pc_TC_TT_HYPO = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hypo",],
as.data.frame(myannot$G2G1ihypo@members))
DMSlist$DMS_15pc_TC_TT_HYPER = myAnnotateDMS(DMSlist$DMS_15pc_TC_TT[DMS_info_G2_G1i_final$direction %in% "hyper",],
as.data.frame(myannot$G2G1ihyper@members))
## Make Venn diagram for each feature
getFeatureDFHYPO <- function(myfeat){
a = DMSlist$DMS_15pc_G1_C_T_HYPO$pos[DMSlist$DMS_15pc_G1_C_T_HYPO$feature %in% myfeat]
b = DMSlist$DMS_15pc_CC_CT_HYPO$pos[DMSlist$DMS_15pc_CC_CT_HYPO$feature %in% myfeat]
c = DMSlist$DMS_15pc_TC_TT_HYPO$pos[DMSlist$DMS_15pc_TC_TT_HYPO$feature %in% myfeat]
return(list(a=a,b=b,c=c))
}
getFeatureDFHYPER <- function(myfeat){
a = DMSlist$DMS_15pc_G1_C_T_HYPER$pos[DMSlist$DMS_15pc_G1_C_T_HYPER$feature %in% myfeat]
b = DMSlist$DMS_15pc_CC_CT_HYPER$pos[DMSlist$DMS_15pc_CC_CT_HYPER$feature %in% myfeat]
c = DMSlist$DMS_15pc_TC_TT_HYPER$pos[DMSlist$DMS_15pc_TC_TT_HYPER$feature %in% myfeat]
return(list(a=a,b=b,c=c))
}
getVenn <- function(feat, direction){
if (direction == "hypo"){
ggVennDiagram(list(A = getFeatureDFHYPO(feat)[["a"]],
B = getFeatureDFHYPO(feat)[["b"]],
C = getFeatureDFHYPO(feat)[["c"]]), label_alpha = 0,
category.names = c(paste0("DMS G1\nhypo\n", feat), paste0("DMS G2-c\nhypo\n", feat), paste0("DMS G2-i\nhypo\n", feat))) +
scale_fill_gradient(low="white",high = "red")
} else if (direction == "hyper"){
ggVennDiagram(list(A = getFeatureDFHYPER(feat)[["a"]],
B = getFeatureDFHYPER(feat)[["b"]],
C = getFeatureDFHYPER(feat)[["c"]]), label_alpha = 0,
category.names = c(paste0("DMS G1\nhyper\n", feat), paste0("DMS G2-c\nhyper\n", feat), paste0("DMS G2-i\nhyper\n", feat))) +
scale_fill_gradient(low="white",high = "red")
}
}
ggarrange(getVenn("promoter", "hypo"), getVenn("exon", "hypo"),
getVenn("intron", "hypo"), getVenn("intergenic", "hypo"),
nrow = 2, ncol = 2)
ggarrange(getVenn("promoter", "hyper"), getVenn("exon", "hyper"),
getVenn("intron", "hyper"), getVenn("intergenic", "hyper"),
nrow = 2, ncol = 2)
## Parents trt-ctrl
# load annotation
annot_PAR <- as.data.frame(diffAnn_PAR@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_G1_C_T, annotFile = annot_PAR, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G1 DMS")
## G2-G1c trt-ctrl
# load annotation
annot_G2_G1c <- as.data.frame(diffAnn_G2_controlG1@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_CC_CT, annotFile = annot_G2_G1c, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1c DMS")
## G2-G1i trt-ctrl
# load annotation
annot_G2_G1i <- as.data.frame(diffAnn_G2_infectedG1@members)
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_TC_TT, annotFile = annot_G2_G1i, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1i DMS")
## Outliers in Manhattan plot: 15% diff + 2SD
outliers_G1_final <- which(abs(DMSlist$DMS_15pc_G1_C_T$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_G1_C_T$meth.diff)))
outliers_annot_G1 <- as.data.frame(diffAnn_PAR@members)[outliers_G1_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_G1_C_T[outliers_G1_final, ],
annotFile = outliers_annot_G1, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G1 DMS")
outliers_G2_G1c_final <- which(abs(DMSlist$DMS_15pc_CC_CT$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_CC_CT$meth.diff)))
outliers_annot_G2_G1c <- as.data.frame(diffAnn_G2_controlG1@members)[outliers_G2_G1c_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_CC_CT[outliers_G2_G1c_final, ],
annotFile = outliers_annot_G2_G1c, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1c DMS")
outliers_G2_G1i_final <- which(abs(DMSlist$DMS_15pc_TC_TT$meth.diff) > 15 + 2*sd(abs(DMSlist$DMS_15pc_TC_TT$meth.diff)))
outliers_annot_G2_G1i <- as.data.frame(diffAnn_G2_infectedG1@members)[outliers_G2_G1i_final,]
makeManhattanPlots(DMSfile = DMSlist$DMS_15pc_TC_TT[outliers_G2_G1i_final, ],
annotFile = outliers_annot_G2_G1i, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = "Manhattan plot of G2-G1i DMS")
TBC To be continued
## All in DMSBPlist
#
# ## Add fam to bp
# names(DMSBPlist) <- paste0(plyr::join(data.frame(BP = names(DMSBPlist)),
# unique(data.frame(BP = fullMetadata_OFFS$brotherPairID,
# fam = fullMetadata_OFFS$Family)))[[2]], "_", names(DMSBPlist))
## Extract DMS (by position)
myPosList = lapply(DMSBPlist, lapply, function(x){paste(x$chr, x$end)})
## Find DMS present in at least 4 BP out of 8 (half):
get2keep = function(Compa){
x <- lapply(myPosList, function(x){unlist(x[[paste0("DMS_15pc_BP_", Compa)]])})
f <- table(unlist((x))) # each DMS present between 1 and 8 times
tokeep <- names(f)[f >= 4]
print(length(tokeep))
## Keep the DMS present in 4 families minimum
DMSBPlist_INTER4 <- lapply(x, function(x){x[x %in% tokeep]})
## Reorder by family:
DMSBPlist_INTER4 <- DMSBPlist_INTER4[names(DMSBPlist_INTER4)[order(names(DMSBPlist_INTER4))]]
return(DMSBPlist_INTER4)
}
## Prepare df for complexUpset
getUpsetDF = function(i){ # for a given comparison
A = get2keep(vecCompa[i])
A2 = lapply(A, function(x){
x = data.frame(x) # vector of DMS as df
names(x) = "DMS" # name each CpG
return(x)
})
## Add BP name
for (i in 1:length(names(A2))){
A2[[i]]["BP"] = names(A2)[i]
}
# make a dataframe
A2 = A2 %>% reduce(full_join, by = "DMS")
# names column with BP id
for (i in 2:ncol(A2)) {names(A2)[i] = unique(A2[!is.na(A2[i]), i])}
# replace by 0 or 1 the DMS absence/presence
A = data.frame(apply(A2[2:9], 2, function(x) ifelse(is.na(x), 0, 1)))
# add DMS
A$DMS = A2$DMS
return(A)
}
## Vector of all 4 comparisons
vecCompa <- c("CC_TC", "CT_TT", "CC_CT", "TC_TT")
## Make upset plots
for (i in 1:4){
df = getUpsetDF(i)
print(ComplexUpset::upset(
df,
names(df)[1:8],
width_ratio=0.1,
sort_intersections_by=c('degree', 'cardinality'),
queries=query_by_degree(
df, names(df)[1:8],
params_by_degree=list(
'1'=list(color='red', fill='red'),
'2'=list(color='purple', fill='purple'),
'3'=list(color='blue', fill='blue'),
'4'=list(color='grey', fill='grey'),
'5'=list(color='red', fill='red'),
'6'=list(color='purple', fill='purple'),
'7'=list(color='blue', fill='blue'),
'8'=list(color='green', fill='green')
),
only_components=c("intersections_matrix", "Intersection size")
)))
}
## [1] 1099
## [1] 1257
## [1] 329
## [1] 341
plotManhattanGenes <- function(i){
DMSvec = unique(unlist(get2keep(vecCompa[i])))
# Annotate the DMS present in at least 4 BP:
# Change the vector into a methobject:
df <- data.frame(chr=sapply(strsplit(DMSvec, " "), `[`, 1),
start=sapply(strsplit(DMSvec, " "), `[`, 2),
end=sapply(strsplit(DMSvec, " "), `[`, 2))
# get annotation
anot4BP <- getAnnotationFun(makeGRangesFromDataFrame(df))
length(anot4BP) # number of genes
# Reorder chromosome
anot4BP <- anot4BP %>%
mutate(chr = gsub("Gy_chr", "", seqid), chrom_nr = seqid %>% deroman(), chrom_order=factor(chrom_nr) %>% as.numeric()) %>% arrange(chrom_order) %>%
mutate(geneLengthkb = (end - start)/1000, nCpGperGenekb = nCpG/geneLengthkb)
# Plot
plot = ggplot(anot4BP, aes(x=start, y = nCpGperGenekb)) + geom_point(alpha=.4) +
facet_grid(.~fct_inorder(chr)) +
geom_hline(yintercept = 1)+
theme(axis.text.x=element_blank()) +
xlab(paste0("Genes with DMS present in at least 4 brother pairs\nComparison: ", vecCompa[i])) +
ylab("Number of differentially methylated CpG per gene kb")
## Genes with at least 1 CpG differentially methylated per kb:
topGenes = anot4BP[anot4BP$nCpGperGenekb >= 1,]
return(list(plot = plot, anot4BP = anot4BP, topGenes = topGenes))
}
plotManhattanGenes(1)$plot
## [1] 1099
plotManhattanGenes(2)$plot
## [1] 1257
plotManhattanGenes(3)$plot
## [1] 329
plotManhattanGenes(4)$plot
## [1] 341
[1] 1099
| comparison | chr | gene |
|---|---|---|
| CC_TC | I | Similar to mfsd1: Major facilitator superfamily domain-containing protein 1 (Danio rerio OX=7955) |
| CC_TC | I | Similar to Lim2: Lens fiber membrane intrinsic protein (Rattus norvegicus OX=10116) |
| CC_TC | III | Similar to SELENOF: Selenoprotein F (Homo sapiens OX=9606) |
| CC_TC | IV | Similar to MAP1LC3C: Microtubule-associated proteins 1A/1B light chain 3C (Homo sapiens OX=9606) |
| CC_TC | IV | Protein of unknown function |
| CC_TC | V | Similar to PRSS12: Neurotrypsin (Trachypithecus phayrei OX=61618) |
| CC_TC | V | Similar to hba2: Hemoglobin subunit alpha-2 (Anarhichas minor OX=65739) |
| CC_TC | VI | Similar to PROKR1: Prokineticin receptor 1 (Bos taurus OX=9913) |
| CC_TC | VI | Similar to CIAO2B: Cytosolic iron-sulfur assembly component 2B (Homo sapiens OX=9606) |
| CC_TC | VII | Protein of unknown function |
| CC_TC | VII | Similar to Kcnj6: G protein-activated inward rectifier potassium channel 2 (Mus musculus OX=10090) |
| CC_TC | VIII | Similar to GTF2IRD2: General transcription factor II-I repeat domain-containing protein 2A (Homo sapiens OX=9606) |
| CC_TC | IX | Similar to DGKE: Diacylglycerol kinase epsilon (Homo sapiens OX=9606) |
| CC_TC | IX | Protein of unknown function |
| CC_TC | X | Similar to Type-4 ice-structuring protein LS-12 (Myoxocephalus octodecemspinosus OX=68557) |
| CC_TC | X | Similar to kcnk9: Potassium channel subfamily K member 9 (Xenopus laevis OX=8355) |
| CC_TC | XI | Similar to CDC42EP1: Cdc42 effector protein 1 (Homo sapiens OX=9606) |
| CC_TC | XII | Similar to Tafa1: Chemokine-like protein TAFA-1 (Mus musculus OX=10090) |
| CC_TC | XII | Similar to Npbwr1: Neuropeptides B/W receptor type 1 (Mus musculus OX=10090) |
| CC_TC | XIV | Similar to rasgef1bb: Ras-GEF domain-containing family member 1B-B (Danio rerio OX=7955) |
| CC_TC | XVI | Similar to Me3: NADP-dependent malic enzyme, mitochondrial (Mus musculus OX=10090) |
| CC_TC | XVI | Protein of unknown function |
| CC_TC | XVIII | Similar to bmp2: Bone morphogenetic protein 2 (Tetraodon nigroviridis OX=99883) |
| CC_TC | XVIII | Similar to YY1: Transcriptional repressor protein YY1 (Homo sapiens OX=9606) |
| CC_TC | XVIII | Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913) |
| CC_TC | XVIII | Similar to rsph9: Radial spoke head protein 9 homolog (Danio rerio OX=7955) |
| CC_TC | XVIII | Similar to TENT5A: Terminal nucleotidyltransferase 5A (Homo sapiens OX=9606) |
| CC_TC | XX | Similar to MAG: Myelin-associated glycoprotein (Homo sapiens OX=9606) |
| CC_TC | XX | Similar to vim: Vimentin (Oncorhynchus mykiss OX=8022) |
| CC_TC | XXI | Protein of unknown function |
[1] 1257
| comparison | chr | gene |
|---|---|---|
| CT_TT | I | Similar to OR11A1: Olfactory receptor 11A1 (Homo sapiens OX=9606) |
| CT_TT | I | Protein of unknown function |
| CT_TT | II | Similar to Hapln3: Hyaluronan and proteoglycan link protein 3 (Mus musculus OX=10090) |
| CT_TT | III | Similar to MNCb-2990: Uncharacterized protein C14orf119 homolog (Mus musculus OX=10090) |
| CT_TT | III | Protein of unknown function |
| CT_TT | IV | Similar to Ltc4s: Leukotriene C4 synthase (Rattus norvegicus OX=10116) |
| CT_TT | IV | Similar to ZNF518B: Zinc finger protein 518B (Homo sapiens OX=9606) |
| CT_TT | IV | Similar to MAP1LC3C: Microtubule-associated proteins 1A/1B light chain 3C (Homo sapiens OX=9606) |
| CT_TT | IV | Similar to CHRNA9: Neuronal acetylcholine receptor subunit alpha-9 (Gallus gallus OX=9031) |
| CT_TT | IV | Similar to cdpf1: Cysteine-rich DPF motif domain-containing protein 1 (Xenopus laevis OX=8355) |
| CT_TT | V | Similar to hba2: Hemoglobin subunit alpha-2 (Anarhichas minor OX=65739) |
| CT_TT | VI | Similar to NANP: N-acylneuraminate-9-phosphatase (Homo sapiens OX=9606) |
| CT_TT | VI | Similar to CIAO2B: Cytosolic iron-sulfur assembly component 2B (Homo sapiens OX=9606) |
| CT_TT | VIII | Protein of unknown function |
| CT_TT | IX | Similar to Fhdc1: FH2 domain-containing protein 1 (Mus musculus OX=10090) |
| CT_TT | IX | Similar to LGALSL: Galectin-related protein (Gallus gallus OX=9031) |
| CT_TT | IX | Similar to eif4a3: Eukaryotic initiation factor 4A-III (Danio rerio OX=7955) |
| CT_TT | X | Similar to Type-4 ice-structuring protein LS-12 (Myoxocephalus octodecemspinosus OX=68557) |
| CT_TT | XI | Similar to hoxb9a: Homeobox protein Hox-B9a (Takifugu rubripes OX=31033) |
| CT_TT | XI | Similar to fzd2: Frizzled-2 (Xenopus laevis OX=8355) |
| CT_TT | XI | Similar to ELFN2: Protein phosphatase 1 regulatory subunit 29 (Homo sapiens OX=9606) |
| CT_TT | XI | Protein of unknown function |
| CT_TT | XII | Similar to MTG2: Mitochondrial ribosome-associated GTPase 2 (Pongo abelii OX=9601) |
| CT_TT | XII | Similar to RIBC1: RIB43A-like with coiled-coils protein 1 (Bos taurus OX=9913) |
| CT_TT | XII | Similar to rpl10a: 60S ribosomal protein L10a (Ictalurus punctatus OX=7998) |
| CT_TT | XII | Similar to MAPK13: Mitogen-activated protein kinase 13 (Bos taurus OX=9913) |
| CT_TT | XIV | Similar to S100Z: Protein S100-Z (Homo sapiens OX=9606) |
| CT_TT | XIV | Protein of unknown function |
| CT_TT | XVII | Similar to FAM107B: Protein FAM107B (Homo sapiens OX=9606) |
| CT_TT | XVIII | Similar to Opn5: Opsin-5 (Mus musculus OX=10090) |
| CT_TT | XVIII | Similar to FKBP3: Peptidyl-prolyl cis-trans isomerase FKBP3 (Bos taurus OX=9913) |
| CT_TT | XXI | Similar to Phex: Phosphate-regulating neutral endopeptidase PHEX (Mus musculus OX=10090) |
| CT_TT | XXI | Similar to mc4r: Melanocortin receptor 4 (Danio rerio OX=7955) |
| CT_TT | XXI | Similar to acbd5a: Acyl-CoA-binding domain-containing protein 5A (Danio rerio OX=7955) |
[1] 329
| comparison | chr | gene |
|---|---|---|
| CC_CT | I | Similar to OR11A1: Olfactory receptor 11A1 (Homo sapiens OX=9606) |
| CC_CT | IV | Similar to MAP1LC3C: Microtubule-associated proteins 1A/1B light chain 3C (Homo sapiens OX=9606) |
| CC_CT | IV | Protein of unknown function |
| CC_CT | IV | Similar to cdpf1: Cysteine-rich DPF motif domain-containing protein 1 (Xenopus laevis OX=8355) |
| CC_CT | V | Similar to TRIM16: E3 ubiquitin-protein ligase TRIM16 (Pongo abelii OX=9601) |
| CC_CT | V | Similar to hba2: Hemoglobin subunit alpha-2 (Anarhichas minor OX=65739) |
| CC_CT | VI | Similar to CIAO2B: Cytosolic iron-sulfur assembly component 2B (Homo sapiens OX=9606) |
| CC_CT | XI | Similar to EPN3: Epsin-3 (Homo sapiens OX=9606) |
| CC_CT | XIII | Similar to plpp5: Phospholipid phosphatase 5 (Xenopus laevis OX=8355) |
| CC_CT | XVIII | Similar to CHGA: Chromogranin-A (Equus caballus OX=9796) |
| CC_CT | XXI | Similar to NSUN6: tRNA (cytosine(72)-C(5))-methyltransferase NSUN6 (Homo sapiens OX=9606) |
[1] 341
| comparison | chr | gene |
|---|---|---|
| TC_TT | I | Similar to Lim2: Lens fiber membrane intrinsic protein (Rattus norvegicus OX=10116) |
| TC_TT | IV | Similar to ZNF518B: Zinc finger protein 518B (Homo sapiens OX=9606) |
| TC_TT | IV | Similar to MAP1LC3C: Microtubule-associated proteins 1A/1B light chain 3C (Homo sapiens OX=9606) |
| TC_TT | IV | Protein of unknown function |
| TC_TT | V | Similar to ZNF691: Zinc finger protein 691 (Bos taurus OX=9913) |
| TC_TT | VII | Similar to Rnf167: E3 ubiquitin-protein ligase RNF167 (Mus musculus OX=10090) |
| TC_TT | XIV | Similar to Rpl28: 60S ribosomal protein L28 (Mus musculus OX=10090) |
| TC_TT | XVI | Protein of unknown function |
| TC_TT | XVIII | Similar to Mgat2: Alpha-1,6-mannosyl-glycoprotein 2-beta-N-acetylglucosaminyltransferase (Rattus norvegicus OX=10116) |
| TC_TT | XX | Similar to Cdcp1: CUB domain-containing protein 1 (Mus musculus OX=10090) |
# create gene universe
gene_universe <- data.frame(
subsetByOverlaps(GRanges(annotGff3), GRanges(uniteCov14_G2_woSexAndUnknowChrOVERLAP))) %>% # subselect covered CpGs
filter(lengths(Ontology_term)!=0) %>% # rm non existing GO terms
filter(type %in% "gene") %>% # keep all the 7416 genes with GO terms
dplyr::select(c("Name", "Ontology_term")) %>%
mutate(go_linkage_type = "IEA") %>% #NB: IEA but not necessarily true, it's from Interproscan after Maker. Sticklebacks (biomart) have 82701 IEA and 63 ISS.
relocate("Ontology_term","go_linkage_type","Name") %>%
unnest(Ontology_term) %>% # one GO per line (was a list before in this column)
data.frame()
# Create gene set collection
goFrame <- GOFrame(gene_universe, organism="Gasterosteus aculeatus")
goAllFrame <- GOAllFrame(goFrame)
gsc_universe <- GeneSetCollection(goAllFrame, setType = GOCollection())
IMPORTANT NOTE from Mel: why conditional hypergeometric test? The GO ontology is set up as a directed acyclic graph, where a parent term is comprised of all its child terms. If you do a standard hypergeometric, you might e.g., find ‘positive regulation of kinase activity’ to be significant. If you then test ‘positive regulation of catalytic activity’, which is a parent term, then it might be significant as well, but only because of the terms coming from positive regulation of kinase activity.
The conditional hypergeometric takes this into account, and only uses those terms that were not already significant when testing a higher order (parent) term.
makedfGO <- function(i){
anot4BP = plotManhattanGenes(i)$anot4BP
## Create subuniverse:
sub_universe <- gene_universe %>%
subset(gene_universe$Name %in% unlist(anot4BP$Parent))
## Run conditional hypergeometric test:
runTestHypGeom <- function(sub_universe, onto){
## Constructing a GOHyperGParams objects or KEGGHyperGParams objects from a GeneSetCollection
# Use GOEnrich as a wrapper around GOStat for extra FDR comparison
## Does not solve all issues, but better than nothing.
## See: https://support.bioconductor.org/p/5571/
## To do (maybe?) add gene count in internal GOEnrich function
GO_fdr <- joinGOEnrichResults(goEnrichTest(gsc=gsc_universe,
gene.ids = as.vector(unique(sub_universe[["Name"]])),# genes in selected gene set
univ.gene.ids = unique(gene_universe$Name),
ontologies = onto, # A string for GO to use ("BP", "CC", or "MF")
pvalue.cutoff = 0.05,
cond = TRUE, # see note above
test.dir = "over"),# over represented GO terms
p.adjust.method = "fdr")
## Run hypergeometric test:
return(GO_fdr)
}
## TO DO: add extra pFDR correction cf Mel
GO_MF <- runTestHypGeom(sub_universe = sub_universe, onto = "MF")
GO_CC <- runTestHypGeom(sub_universe = sub_universe, onto = "CC")
GO_BP <- runTestHypGeom(sub_universe = sub_universe, onto = "BP")
# Merge the df MP and BP
dfGO = rbind(GO_MF, GO_CC, GO_BP)
dfGO = dfGO %>% mutate(Term = factor(x = GO.term, levels = GO.term),
Comp = factor(x = vecCompa[i], levels = vecCompa[i]))
return(dfGO)
}
## For each comparison:
A=makedfGO(1); B=makedfGO(2); C=makedfGO(3); D=makedfGO(4)
## [1] 1099
## [1] 1257
## [1] 329
## [1] 341
dfGO = rbind(A, B, C, D)
# add type of comparison:
dfGO$group = "Different parental treatment"
dfGO$group[dfGO$Comp %in% c("CC_CT", "TC_TT")] = "Different offpring treatment"
# # to keep the comparison in order:
# dfGO$Comp = factor(dfGO$Comp, levels = unique(dfGO$Comp))
## To save:
# htmlReport(GO_MF_0633, file="../Routput/GO/GO_MF_0633.html")
# write.table(as.data.frame(summary(GO_MF_0633)),"../Routput/GO/GO_MF_0633.txt", sep="\t", row.names = F)
dfGO %>% ggplot(aes(x=Comp, y = factor(GO.name))) +
geom_point(aes(color = p.value.adjusted))+#, size = Count)) +
scale_color_gradient(name="P value", low = "red", high = "blue") +
# scale_size_continuous(name = "Gene number", range = c(1, 10), breaks = c(1,5,15,25))+
theme_bw() + ylab("") + xlab("Treatments comparison") +
facet_grid(fct_inorder(GO.category)~group,scales="free",space = "free")
##############################################################
#### I. Focus on CpG positions at parental (Ctrl-Inf) DMS ####
##############################################################
####################################################################################
#### Question: how are the beta values in the different groups at the parental DMS?##
####################################################################################
##############
## Prepare dataset
##############
PM_G1 <- getPMdataset(uniteCov = uniteCov6_G1_woSexAndUnknowChrOVERLAP, MD = fullMetadata_PAR, gener="parents")
PM_G2 <- getPMdataset(uniteCov = uniteCov14_G2_woSexAndUnknowChrOVERLAP, MD = fullMetadata_OFFS, gener="offspring")
head(PM_G1)
head(PM_G2)
table(fullMetadata_OFFS$trtG1G2, fullMetadata_OFFS$clutch.ID)
## What is the relative contribution of methylation to brother pair & paternal treatment?
## Test of VCA: variance component analysis https://cran.r-project.org/web/packages/VCA/vignettes/VCA_package_vignette.html
## Hypo
PM_G2_mean_hypo <- PM_G2[PM_G2$hypohyper %in% "hypo", ] %>%
group_by(brotherPairID, G1_trt, G2_trt, ID) %>%
dplyr::summarize(MeanBetaValue = mean(BetaValue, na.rm=TRUE)) %>% data.frame()
varPlot(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hypo,
MeanLine=list(var=c("G1_trt", "G2_trt"),
col=c("white", "blue"), lwd=c(2,2)),
BG=list(var="G2_trt", col=paste0("gray", c(80, 90))),
YLabel=list(cex = .8, text="Mean beta value at parDMS \n hypomethylated upon infection"))
myfitVCA_hypo <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hypo)
### Real values
trtEffect <- sum(myfitVCA_hypo$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA_hypo$aov.tab[5:8, 5])
error <- sum(myfitVCA_hypo$aov.tab[9, 5])
realValHypoVCA <- data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error)
### Randomisation
myrandomVCA <- function(df=PM_G2_mean_hypo){
randomDF = df
randomDF$G1_trt = sample(PM_G2_mean_hypo$G1_trt, replace = F)
randomDF$G2_trt = sample(PM_G2_mean_hypo$G2_trt, replace = F)
randomDF$brotherPairID = sample(PM_G2_mean_hypo$brotherPairID, replace = F)
myfitVCA <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = randomDF)
trtEffect <- sum(myfitVCA$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA$aov.tab[5:8, 5])
error <- sum(myfitVCA$aov.tab[9, 5])
return(data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error))
}
randomHypoVCA = do.call(rbind, lapply(1:1000, function(x) {
df=myrandomVCA(PM_G2_mean_hypo)
df$rep=x
return(df)}))
randomHypoVCA = melt(randomHypoVCA, id.vars = "rep")
# saveRDS(randomHypoVCA, file = "Rdata/randomHypoVCA.RDS")
randomHypoVCA <- readRDS(file = "Rdata/randomHypoVCA.RDS")
df2=reshape2::melt(realValHypoVCA)
sumDF <- randomHypoVCA %>%
group_by(variable) %>%
dplyr::summarize(value = mean(value)) %>% data.frame()
ggplot(randomHypoVCA, aes(x=variable, y=value))+
geom_boxplot()+
geom_jitter(width=.1, alpha=.2)+
geom_point(data = df2, col = "red", size = 6)+
geom_text(data=sumDF, aes(label=round(value)), col="white")+
geom_text(data = df2, aes(label=round(value)), col="white")+
theme_cleveland()+
ggtitle("VCA with bootstrap N=1000 at hypo-parDMS", subtitle = "red: observed values")
# estimate 95% confidence intervals, request CI for
# all variance components via 'VarVC=TRUE'
VCAinference(myfitVCA_hypo, VarVC=TRUE)
## Hyper
PM_G2_mean_hyper <- PM_G2[PM_G2$hypohyper %in% "hyper", ] %>%
group_by(brotherPairID, G1_trt, G2_trt, ID) %>%
dplyr::summarize(MeanBetaValue = mean(BetaValue, na.rm=TRUE)) %>% data.frame()
varPlot(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hyper,
MeanLine=list(var=c("G1_trt", "G2_trt"),
col=c("white", "blue"), lwd=c(2,2)),
BG=list(var="G2_trt", col=paste0("gray", c(80, 90))),
YLabel=list(cex = .8, text="Mean beta value at parDMS \n hypermethylated upon infection"))
myfitVCA_hyper <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hyper)
### Real values
trtEffect <- sum(myfitVCA_hyper$aov.tab[2:4, 5])
genEffect <- sum(myfitVCA_hyper$aov.tab[5:8, 5])
error <- sum(myfitVCA_hyper$aov.tab[9, 5])
realValHyperVCA <- data.frame(trtEffect=trtEffect, genEffect=genEffect,error=error)
### Randomisation
randomHyperVCA = do.call(rbind, lapply(1:1000, function(x) {
df=myrandomVCA(PM_G2_mean_hyper)
df$rep=x
return(df)}))
randomHyperVCA = melt(randomHyperVCA, id.vars = "rep")
# saveRDS(randomHyperVCA, file = "Rdata/randomHyperVCA.RDS")
randomHyperVCA <- readRDS(file = "Rdata/randomHyperVCA.RDS")
df2=reshape2::melt(realValHyperVCA)
sumDF <- randomHyperVCA %>%
group_by(variable) %>%
dplyr::summarize(value = mean(value)) %>% data.frame()
ggplot(randomHyperVCA, aes(x=variable, y=value))+
geom_boxplot()+
geom_jitter(width=.1, alpha=.2)+
geom_point(data = df2, col = "red", size = 6)+
geom_text(data=sumDF, aes(label=round(value)), col="white")+
geom_text(data = df2, aes(label=round(value)), col="white")+
theme_cleveland()+
ggtitle("VCA with bootstrap N=1000 at hyper-parDMS", subtitle = "red: observed values")
# estimate 95% confidence intervals, request CI for
# all variance components via 'VarVC=TRUE'
VCAinference(myfitVCA_hypo, VarVC=TRUE)
################
## Hyper
PM_G2_mean_hyper <- PM_G2[PM_G2$hypohyper %in% "hyper", ] %>%
group_by(brotherPairID, G1_trt, G2_trt, ID) %>%
dplyr::summarize(MeanBetaValue = mean(BetaValue, na.rm=TRUE)) %>% data.frame()
varPlot(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hyper,
MeanLine=list(var=c("G1_trt", "G2_trt"),
col=c("white", "blue"), lwd=c(2,2)),
BG=list(var="G2_trt", col=paste0("gray", c(80, 90))),
YLabel=list(cex = .8, text="Mean beta value at parDMS \n hypermethylated upon infection"))
myfitVCA_hyper <- fitVCA(form = MeanBetaValue~(G1_trt* G2_trt*brotherPairID), Data = PM_G2_mean_hyper)
print(myfitVCA_hyper, digits=4)
# estimate 95% confidence intervals, request CI for
# all variance components via 'VarVC=TRUE'
VCAinference(myfitVCA_hyper, VarVC=TRUE)
##############
## In parents
##############
parmod <- lmer(data = PM_G1, BetaValue ~ meth.diff.parentals : Treatment + (1|CpGSite) + (1|brotherPairID))
## check normality of residuals assumption
qqnorm(resid(parmod))
qqline(resid(parmod))
pred <- ggpredict(parmod, terms = c("meth.diff.parentals", "Treatment"))
plot(pred, add.data = T)+
scale_color_manual(values = c("black", "red"))+
scale_y_continuous(name = "Beta values")+
scale_x_continuous(name = "Methylation difference between infected and control parents in percentage")+
ggtitle("Predicted methylation ratio (Beta) values in parents\n as a function of differential methylation between exposed and control groups")+
theme_bw()
##############
## Linear model: does the beta value of offspring at DMS depends on treatment Parent x Offspring?
##############
modFull <- lmer(BetaValue ~ (G1_trt * G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID),data = PM_G2, REML = F) # REML =F for model comparison
mod_noG1trt <- lmer(BetaValue ~ G2_trt:hypohyper + (1|CpGSite)+ (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noG2trt <-lmer(BetaValue ~ G1_trt:hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noInteractions <- lmer(BetaValue ~ (G1_trt + G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
mod_noHypoHyper <- lmer(BetaValue ~ (G1_trt * G2_trt) + (1|CpGSite) + (1|Sex) + (1|brotherPairID), data = PM_G2, REML = F)
## check normality of residuals assumption
qqnorm(resid(modFull))
qqline(resid(modFull))
## Likelihood ratio tests for all variables:
lrtest(modFull, mod_noG1trt) # G1 trt is VERY VERY significant (LRT: χ² (4) = 1163.6, p < 0.001)
lrtest(modFull, mod_noG2trt) # G2 trt is VERY VERY significant (LRT: χ² (4) = 30.02, p < 0.001) NB that changed when brotherpair is used instead of family!
lrtest(modFull, mod_noInteractions) # interactions are significant (LRT: χ² (2) = 9.21, p < 0.01)
lrtest(modFull, mod_noHypoHyper) # hypo/hyper VERY VERY significant (LRT: χ² (4) = 1140, p < 0.001)
## Post-hoc tests between treatments
modFull <- lmer(BetaValue ~ (G1_trt * G2_trt):hypohyper + (1|CpGSite) + (1|Sex) + (1|brotherPairID),data = PM_G2)
modFull_emmeans <- emmeans(modFull, list(pairwise ~ (G1_trt:G2_trt):hypohyper), adjust = "tukey")
modFull_emmeans
P1 <- plot(modFull_emmeans, by = "hypohyper", comparisons = TRUE) +
# coord_flip()+
theme_bw() +
ggtitle("Estimated marginal means of methylation ratio (beta)\n of offspring at parental DMS")+
theme(legend.position = "none", axis.title.x = element_blank()) +
scale_x_continuous("Beta value (methylation ratio)", limits = c(47,69.5))
## NB: Comparison arrows: https://cran.r-project.org/web/packages/emmeans/vignettes/xplanations.html
## two estimated marginal means (EMMs) differ significantly if, and only if, their respective comparison arrows do not overlap
## These comparison arrows are decidedly not the same as confidence intervals.
## Confidence intervals for EMMs are based on the statistical properties of the individual EMMs, whereas comparison arrows
## are based on the statistical properties of differences of EMMs.
## Add the PARENTAL DMS value
## Same test on ALL, G1 and G2 fish
modFullG1 <- lmer(BetaValue ~ G1_trt:hypohyper + (1|CpGSite) + (1|brotherPairID), data = PM_G1)
modFullG1_emmeans <- emmeans(modFullG1, list(pairwise ~ G1_trt:hypohyper), adjust = "tukey")
modFullG1_emmeans
P2 <- plot(modFullG1_emmeans, by = "hypohyper", comparisons = TRUE) +
theme_bw() +
ggtitle("Estimated marginal means of methylation ratio (beta)\n of parents at DMS")+
theme(legend.position = "none", axis.title.x = element_blank()) +
scale_x_continuous("Beta value (methylation ratio)", limits = c(47,69.5))
ggarrange(P2, P1, labels = c("A", "B"), ncol = 1, nrow = 2)
#####################################################################################
## Are the nbr of residuals methylation AT PARENTAL DMS different in the 4 G2 trt? ##
## (for hypo vs hypermeth)? ##
##############################
length(unique(PM_G1$CpGSite))# 3648 positions
PM_G1 %>% dplyr::count(ID)## NB: not all covered in all samples
length(unique(PM_G2$CpGSite[PM_G2$hypohyper %in% "hypo"]))# 1176 positions hypomethylated upon parental inf
length(unique(PM_G2$CpGSite[PM_G2$hypohyper %in% "hyper"]))# 2472 positions hypermethylated upon parental inf
myfun <- function(X){
## Calculate nbr of CpG hypo/hypermethylated per individual, and nbr of covered CpG:
X <- X %>% group_by(ID, Treatment, brotherPairID, clutch.ID, Sex) %>%
dplyr::summarise(ncov = n(),
hypoMeth = sum(BetaValue < 0.3),
hyperMeth = sum(BetaValue > 0.7)) %>% data.frame()
# Calculate residuals of nbr of methCpG by nbr of covered CpG
X$res_Nbr_methCpG_Nbr_coveredCpG_HYPO = residuals(lm(X$hypoMeth ~ X$ncov))
X$res_Nbr_methCpG_Nbr_coveredCpG_HYPER = residuals(lm(X$hyperMeth ~ X$ncov))
## Statistical model: is it different in each offspring trt group?
mod1 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X, REML = F)
mod0 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ 1 + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X, REML = F)
print(lrtest(mod1, mod0))
## Post-hoc test:
modhypo <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPO ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X)
## pairwise posthoc test
print(emmeans(modhypo, list(pairwise ~ Treatment), adjust = "tukey"))
mod3 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X, REML = F)
mod4 <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ 1 + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X, REML = F)
print(lrtest(mod3, mod4))
## Post-hoc test:
modhyper <- lmer(res_Nbr_methCpG_Nbr_coveredCpG_HYPER ~ Treatment + (1|brotherPairID/clutch.ID) + (1|Sex),
data = X)
## pairwise posthoc test
print(emmeans(modhyper, list(pairwise ~ Treatment), adjust = "tukey"))
## Plot
phypo <- plot(ggpredict(modhypo, terms = c("Treatment")), add.data = TRUE)+
scale_y_continuous("Residuals of number of hypomethylated methylated \ncytosines on number of cytosines covered") +
ggtitle("Predicted residuals nbr of hypomethylated CpG")+
theme_bw()
phyper <- plot(ggpredict(modhyper, terms = c("Treatment")), add.data = TRUE)+
scale_y_continuous("Residuals of number of hypermethylated methylated \n cytosines on number of cytosines covered") +
ggtitle("Predicted residuals nbr of hypermethylated CpG")+
theme_bw()
return(list(phypo, phyper))
}
listplots <- myfun(X = PM_G2[PM_G2$hypohyper %in% "hypo",])
## NOT significant
annotate_figure(ggarrange(listplots[[1]], listplots[[2]],ncol = 2, nrow = 1),
top = text_grob("Parental DMS hypomethylated upon infection, in offspring"))
listplots <- myfun(X = PM_G2[PM_G2$hypohyper %in% "hyper",])
## Treatment SIGNIFICANT in both excess hypo/hyper methylation **
# $`pairwise differences of Treatment`
## HYPO
# 1 estimate SE df t.ratio p.value
# NE_control - E_control 23.71 7.28 10.3 3.257 0.0353
# NE_control - E_exposed 26.88 7.26 10.3 3.701 0.0172
## HYPER
# 1 estimate SE df t.ratio p.value
# NE_control - E_control -24.06 7.36 10.3 -3.269 0.0348
# NE_control - E_exposed -27.07 7.34 10.3 -3.687 0.0177
annotate_figure(ggarrange(listplots[[1]], listplots[[2]],ncol = 2, nrow = 1),
top = text_grob("Parental DMS hypermethylated upon infection, in offspring"))
## The beta values in parentalDMS in offspring follow the parental pattern hypo/hyper methylated upon infection
######################################################################################
## II. CORE DMS: Which of the parental DMS are also found in offspring comparisons? ##
######################################################################################
intersect(paste(DMSlist$DMS_15pc_G1_C_T$chr, DMSlist$DMS_15pc_G1_C_T$start),
intersect(paste(DMSlist$DMS_15pc_CC_CT$chr, DMSlist$DMS_15pc_CC_CT$start),
paste(DMSlist$DMS_15pc_TC_TT$chr, DMSlist$DMS_15pc_TC_TT$start)))
## ONLY 4!!! "Gy_chrII 22196179" "Gy_chrXII 10863858" "Gy_chrXVII 2658079" "Gy_chrXX 5344222"
###############################################################
## CORE DMS: Which of the DMS are common to CC-CI and IC-II? ##
###############################################################
makeManP <- function(comp1, comp2){
A <- methylKit::select(DMS1, which(paste(DMS1$chr, DMS1$start) %in% coreDMS))
B <- as.data.frame(annotateWithGeneParts(as(A,"GRanges"),annotBed12)@members)
A2 <- methylKit::select(DMS2,
which(paste(DMS2$chr, DMS2$start) %in% coreDMS))
B2 <- as.data.frame(annotateWithGeneParts(as(A2,"GRanges"),annotBed12)@members)
ggarrange(
makeManhattanPlots(DMSfile = DMS1,
annotFile = as.data.frame(annotateWithGeneParts(as(DMS1,"GRanges"),annotBed12)@members),
GYgynogff = GYgynogff, mycols = c("red", "grey", "black", "green"),
mytitle = paste0("Manhattan plot of ", comp1, " DMS")),
makeManhattanPlots(DMSfile = DMS2,
annotFile = as.data.frame(annotateWithGeneParts(as(DMS2,"GRanges"),annotBed12)@members),
GYgynogff = GYgynogff, mycols = c("red", "grey", "black", "green"),
mytitle = paste0("Manhattan plot of ", comp2, " DMS")),
makeManhattanPlots(DMSfile = A, annotFile = B, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = paste0("Manhattan plot core DMS in ", comp1)),
makeManhattanPlots(DMSfile = A2, annotFile = B2, GYgynogff = GYgynogff,
mycols = c("red", "grey", "black", "green"), mytitle = paste0("Manhattan plot core DMS in ", comp2)),
labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4, common.legend = T)
}
DMS1 = DMSlist$DMS_15pc_CC_CT # from: getDiffMeth(myuniteCov = uniteCov14_G2_woSexAndUnknowChr_G1CONTROL, myMetadata = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% c(5,6),])
DMS2 = DMSlist$DMS_15pc_TC_TT # from: getDiffMeth(myuniteCov = uniteCov14_G2_woSexAndUnknowChr_G1INFECTED, myMetadata = fullMetadata_OFFS[fullMetadata_OFFS$trtG1G2_NUM %in% c(2,3),])
coreDMS <- intersect(paste(DMS1$chr, DMS1$start), paste(DMS2$chr, DMS2$start))
## Make Manhattan plot:
makeManP(comp1 = "CC-CI", comp2 = "IC-II")
## Annotation of the core DMS:
coreDMSmethylDiff <- methylKit::select(DMS1, which(paste(DMS1$chr, DMS1$start) %in% coreDMS))
## Differentially methylated sites:
subGOterms = getAnnotationFun(METHOBJ = coreDMSmethylDiff)
## Background annotations:
universeGOterms = getAnnotationFun(METHOBJ = uniteCov14_G2_woSexAndUnknowChrOVERLAP)
length(universeGOterms)# 16024
# as.vector(lapply(strsplit(as.character(TSSAssociation_DiffMeth15p$feature.name), "\\."), "[", 1))
# Using the genes with associated pop-DMS, we applied
# a conditional hypergeometric Gene Ontology (GO) term enrichment
# analysis (false discovery rate–corrected P ≤ 0.05) with the Ensembl
# stickleback annotation dataset “gaculeatus_gene_ensembl,” and all
# genes that were associated to any sequenced CpG site were used as
# universe. We identified overrepresented biological processes, molec-
# ular functions, and cellular components using the packages GOstats
# version 2.5 (59) and GSEABase version 1.46 (60) and corrected for
# multiple testing using the false discovery rate method implemented
# in goEnrichment version 1.0 (61) in R version 3.6 (52). Figures were
# produced using ggplot2 version 3.2 (56)
#######################################################################
## TRANSMITTED DMS: Which of the DMS are found in CCvsIC and CIvsII? ##
#######################################################################
makeManP(DMS1 = DMS15pc_G1_controlG2_half, comp1 = "CC-IC",
DMS2 = DMS15pc_G1_infectedG2_half, comp2 = "CI-II")
##############
## Plot mean Beta values of offsprings at parental DMS, per trt, along the chromosomes
##############
meanBeta_G2_simple <- PM_G2 %>% group_by(Chr, Pos, Treatment) %>%
dplyr::summarize(Mean = mean(BetaValue, na.rm=TRUE))
names(meanBeta_G2_simple) <- c("Chr", "Pos", "Treatment_G2", "MeanBetaG2")
genome <- GYgynogff %>%
mutate(chrom_nr=chrom %>% deroman(), chrom_order=factor(chrom_nr) %>% as.numeric()) %>%
arrange(chrom_order) %>%
mutate(gstart=lag(length,default=0) %>% cumsum(), gend=gstart+length,
type=LETTERS[2-(chrom_order%%2)], gmid=(gstart+gend)/2)
mydata = tibble(trt=meanBeta_G2_simple$Treatment_G2,
chrom=meanBeta_G2_simple$Chr, pos=meanBeta_G2_simple$Pos, beta=meanBeta_G2_simple$MeanBetaG2)
mydata$pos <- as.numeric(mydata$pos)
mydata$pos <- as.numeric(mydata$pos)
table(mydata$chrom)## check that chrXIX and chrUN are well removed!!
# join DMS and genomic position
data = dplyr::left_join(mydata, genome) %>% dplyr::mutate(gpos=pos+gstart)
# plot:
ggplot()+
geom_rect(data=genome,aes(xmin=gstart,xmax=gend,ymin=-Inf,ymax=Inf,fill=type), alpha=.2)+
geom_point(data=data, aes(x=gpos,y=beta, shape=trt, col=trt),fill="white", size = 1)+
scale_color_manual(values = colOffs)+
scale_shape_manual(values=c(21,21,21,21))+
scale_fill_manual(values=c(A=rgb(.9,.9,.9),B=NA),guide="none")+
scale_x_continuous(breaks=genome$gmid,labels=genome$chrom %>% str_remove(.,"Gy_chr"),
position = "top",expand = c(0,0))+
theme_minimal()+
theme(panel.grid = element_blank(),
axis.line=element_blank(),
axis.title = element_blank(),
strip.placement = "outside")+
facet_grid(trt~.)+
ggtitle("Mean methylation proportions at the 3648 parental DMS for each offspring group")
###########################################################################################
## Not conclusive: test hyp that beta value is different (higher?) in the center of the chromosome,
## where there are less recombinations. Test for each chromosome
meanBeta_G2_extended <- PM_G2 %>% group_by(Chr, Pos, Treatment, Sex, brotherPairID) %>%
dplyr::summarize(Mean = mean(BetaValue, na.rm=TRUE))
names(meanBeta_G2_extended) <- c("Chr", "Pos", "Treatment_G2","Sex", "brotherPairID", "MeanBetaG2")
mydata = tibble(trt=meanBeta_G2_extended$Treatment_G2, sex = meanBeta_G2_extended$Sex, brotherPairID = meanBeta_G2_extended$brotherPairID,
chrom=meanBeta_G2_extended$Chr, pos=meanBeta_G2_extended$Pos, beta=meanBeta_G2_extended$MeanBetaG2)
mydata$pos <- as.numeric(mydata$pos)
# join DMS and genomic position
data = dplyr::left_join(mydata, genome) %>% dplyr::mutate(gpos=pos+gstart)
## Add distance to center
data$dist2mid <- abs(data$gmid - data$gpos)
mod <- lmer(beta ~ dist2mid:chrom + (1|trt) + (1|sex) + (1|brotherPairID), data = data, REML = F)
mod0 <- lmer(beta ~ dist2mid + (1|trt) + (1|sex) + (1|brotherPairID), data = data, REML = F)
mod00 <- lmer(beta ~ chrom + (1|trt) + (1|sex) + (1|brotherPairID), data = data, REML = F)
lrtest(mod, mod0) # chromosome matters
lrtest(mod0, mod00) # distance to middle matters
## check normality of residuals assumption
qqnorm(resid(mod))
qqline(resid(mod)) # quite skewed
pred <- ggpredict(mod, terms = c("dist2mid","chrom"))
plot(pred) +
scale_color_manual(values = 1:20)